library(Seurat)
library(RColorBrewer)
library(tidyverse)
library(MetBrewer)
library(cowplot)
library(patchwork)
library(DescTools)
library(scales)
library(ggrepel)
library(ggpubr)
library(ComplexHeatmap)
library(circlize)
library(msigdbr)
library(clusterProfiler)
library(enrichplot)
library(org.Rn.eg.db)
library(GOSemSim)
library(ggtree)
library(ggdendro)
library(viridis)
library(aplot)
library(ggtext)
source("util.R")
remote_dir <- "/Volumes/TOSHIBA/molpat/APECED_rats/analysis/single_cell/sc_rat_spleen/data/"
seurat_obj <- readRDS(paste0(remote_dir, "integrated_seurat_object.rds"))

5A

col_vec <- colorRampPalette(brewer.pal(9, "Set1"))(21)[1:18]
color_mapping <- data.frame(Cell.type = levels(seurat_obj$cell.type),
                            Color = col_vec)

umap_plot <- DimPlot(seurat_obj,
        label = TRUE,
        label.size = 5,
        cols = col_vec,
        group.by = "cell.type.idx") +
  theme(legend.position = "none") +
  labs(subtitle = NULL, title = NULL)

legend <- DimPlot(seurat_obj,
                group.by = "legend",
                cols = col_vec,
                label = TRUE)

legend <- plot_grid(get_legend(legend))

umap_plot + legend

5B

ego_s <- read_tsv("data/Figure_5/GO_simplified.tsv")
## Rows: 38 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): ID, Description, GeneRatio, BgRatio, geneID
## dbl (4): pvalue, p.adjust, qvalue, Count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ego_s <- as_tibble(ego_s)
ego_s$GeneRatio <- sapply(ego_s$GeneRatio, function(x) {eval(parse(text=x))})
ego_s <- mutate(ego_s, log10p = -log10(p.adjust), DescriptionTrunc = StrTrunc(Description, maxlen = 50, ellipsis = "...", wbound = FALSE))

P_B <- ggplot(head(ego_s, 15),
       aes(x = GeneRatio, y = reorder(DescriptionTrunc, GeneRatio), 
                        color = `log10p`, size = Count)) + 
  geom_point() +
  geom_point(aes(size=Count), shape = 21, colour="black", stroke=0.5) +
  scale_size(range = c(0, 4)) +
  scale_colour_gradient2(
    low = ("white"),
    mid = ("white"),
    high = scales::muted("red"),
    midpoint = -log10(0.05)
  ) +
  ylab("") + 
  xlab("GeneRatio") + 
  theme_cowplot() +
  coord_cartesian(xlim=c(0, 0.2)) +
  scale_y_discrete(position = "right") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

P_B

5C

de.results <- read_tsv("data/Figure_5/benoist_DE_results.tsv")
## Rows: 42690 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Gene.name, Cell.type, description
## dbl (6): logFC, logCPM, F, PValue, FDR, DE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tonic_effect_processed <- read_tsv("data/Figure_5/tonic_effect_processed.txt")
## Rows: 548 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): GeneSymbol, Cell.type, ortholog_name, Group
## dbl (4): ProbeSetID, IFN.FC, IFNAR_KO.FC, IFNAR_KO.pval
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
de.results <- de.results %>%
  left_join(tonic_effect_processed %>% dplyr::select(ortholog_name, Group) %>% distinct(), by = c("Gene.name" = "ortholog_name")) %>%
  mutate(DE = if_else(FDR < .05 & logFC > 1, "Upregulated", "Not changed"),
             DE = if_else(FDR < .05 & logFC < -1, "Downregulated", DE)) %>%
  replace_na(list(Group = "Unknown"))
## Warning in left_join(., tonic_effect_processed %>% dplyr::select(ortholog_name, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 4 of `x` matches multiple rows in `y`.
## ℹ Row 22 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
B_ORA <- fisher.test(as.numeric(de.results[de.results$Cell.type=="B",]$DE=="Downregulated"), as.numeric(de.results[de.results$Cell.type=="B",]$Group=="Tonic-sensitive"))
MF_ORA <- fisher.test(as.numeric(de.results[de.results$Cell.type=="MF",]$DE=="Downregulated"), as.numeric(de.results[de.results$Cell.type=="MF",]$Group=="Tonic-sensitive"))

p1 <- plot_tonic_ISGs(de.results[de.results$Cell.type=="B",]) + ggtitle("B cell") + annotate("text", x = -5.5, y = 7.2, hjust=0, col="black", label = paste0("OR=",round(B_ORA$estimate, 1),"\np=",format(B_ORA$p.value, digits = 2)))
p2 <- plot_tonic_ISGs(de.results[de.results$Cell.type=="MF",]) + ggtitle("Macrophage") + annotate("text", x = -5.5, y = 7.2, hjust=0, col="black", label = paste0("OR=",round(MF_ORA$estimate, 1),"\np=",format(MF_ORA$p.value, digits = 2)))

P_C <- p1 + p2

P_C
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

5D

tonic_effect_processed <- read_tsv("data/Figure_5/tonic_effect_processed.txt")
## Rows: 548 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): GeneSymbol, Cell.type, ortholog_name, Group
## dbl (4): ProbeSetID, IFN.FC, IFNAR_KO.FC, IFNAR_KO.pval
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cell_type <- seurat_obj$cell.type
levels(cell_type) <- c("T4",  "T4", "T8", "T8", "NK", "NK", "B", "B", "B", "B", "B", "B",  "MF", "MF", "MF", "DC", "DC", "GN")
seurat_obj$cell.type.lvl1 <- cell_type

celltype_vec <- levels(seurat_obj$cell.type.lvl1)
seurat_obj$celltype.gntp <- paste(seurat_obj$cell.type.lvl1, seurat_obj$genotype, sep = "_")
seurat_obj$celltype.gntp <- ordered(seurat_obj$celltype.gntp, levels = paste(rep(celltype_vec, each = 2), rep(c("HE", "KO"), length(celltype_vec)), sep = "_"))
Idents(seurat_obj) <- "celltype.gntp"

DefaultAssay(seurat_obj) <- "RNA"
dge_df = data.frame()
for (i in unique(celltype_vec)) {
  dge_df_sub <- FoldChange(seurat_obj, ident.1 = paste(i, "_KO", sep=""), ident.2 = paste(i, "_HE", sep=""), verbose = TRUE) %>%
    rownames_to_column("gene") %>%
    mutate(Cell.type = i)
  dge_df <- rbind(dge_df, dge_df_sub)
}

mat <- dge_df %>%
  dplyr::select(-c(pct.1, pct.2)) %>%
  pivot_wider(names_from = "Cell.type", values_from = "avg_log2FC") %>%
  column_to_rownames("gene")

B

data_to_plot <- tonic_effect_processed %>%
  filter(Cell.type=="B") %>%
  dplyr::select(ortholog_name, IFNAR_KO.FC, Group) %>%
  drop_na(ortholog_name) %>%
  left_join(dge_df %>% filter(Cell.type == "B") %>% dplyr::select(gene, avg_log2FC), by=c("ortholog_name"="gene")) %>%
  mutate(avg_FC = 2^avg_log2FC, IFNAR_KO.FC = 1/IFNAR_KO.FC) %>%
  drop_na(avg_log2FC)

col_vec <- c("#E41A1C",
               "#d3d3d3",
               "#377EB8")
names(col_vec) <- c("Tonic-sensitive", "NA", "Tonic-insensitive")
lab_df <- filter(data_to_plot, Group == "Tonic-sensitive")
cor_test <- cor.test(data_to_plot$IFNAR_KO.FC, data_to_plot$avg_FC, method = "spearman")
## Warning in cor.test.default(data_to_plot$IFNAR_KO.FC, data_to_plot$avg_FC, :
## Cannot compute exact p-value with ties
p_D1 <- ggplot(data_to_plot, aes(x=IFNAR_KO.FC, y=avg_FC, color=Group, label = ortholog_name)) +
  geom_point(size=1) +
  theme_cowplot(16) +
  geom_hline(yintercept = 1, lty = 2) +
  geom_vline(xintercept = 1, lty = 2) +
  labs(x = "FC (Ifnar1 KO / Ctrl)", y = "FC (Aire KO / Ctrl)") +
  geom_text_repel(data = lab_df,
                  aes(x = IFNAR_KO.FC, y = avg_FC, label = ortholog_name),
                  min.segment.length = 1, seed = 3,
                  box.padding = 1,
                  show.legend = FALSE,
                  max.overlaps =33,
                  force_pull=2,
                  force=100,
                  size=5
  ) +
  annotate("text", x = 0.1, y = 5, hjust = 0, 
           label = paste0("rho=", round(cor_test$estimate, 2), 
                          "\n", "p=", format(cor_test$p.value, digits=3))) +
  scale_color_manual(values = col_vec) +
  scale_x_log10() +
  scale_y_log10() +
  theme(legend.position = "none") +
  ggtitle("B cell")

MF

data_to_plot <- tonic_effect_processed %>%
  filter(Cell.type=="MF") %>%
  dplyr::select(ortholog_name, IFNAR_KO.FC, Group) %>%
  drop_na(ortholog_name) %>%
  left_join(dge_df %>% filter(Cell.type == "MF") %>% dplyr::select(gene, avg_log2FC), by=c("ortholog_name"="gene")) %>%
  mutate(avg_FC = 2^avg_log2FC, IFNAR_KO.FC = 1/IFNAR_KO.FC) %>%
  drop_na(avg_log2FC)

col_vec <- c("#E41A1C",
               "#d3d3d3",
               "#377EB8")
names(col_vec) <- c("Tonic-sensitive", "NA", "Tonic-insensitive")
lab_df <- filter(data_to_plot, Group == "Tonic-sensitive")
cor_test <- cor.test(data_to_plot$IFNAR_KO.FC, data_to_plot$avg_FC, method = "spearman")
## Warning in cor.test.default(data_to_plot$IFNAR_KO.FC, data_to_plot$avg_FC, :
## Cannot compute exact p-value with ties
p_D2 <- ggplot(data_to_plot, aes(x=IFNAR_KO.FC, y=avg_FC, color=Group, label = ortholog_name)) +
  geom_point(size=1) +
  theme_cowplot(16) +
  geom_hline(yintercept = 1, lty = 2) +
  geom_vline(xintercept = 1, lty = 2) +
  labs(x = "FC (Ifnar1 KO / Ctrl)", y = "FC (Aire KO / Ctrl)") +
  geom_text_repel(data = lab_df,
                  aes(x = IFNAR_KO.FC, y = avg_FC, label = ortholog_name),
                  min.segment.length = 1, seed = 10,
                  box.padding = 1,
                  show.legend = FALSE,
                  max.overlaps =25,
                  force_pull=100,
                  force=100,
                  nudge_x=.2,
                  size=5
  ) +
  annotate("text", x = 0.05, y = 10, hjust = 0, 
           label = paste0("rho=", round(cor_test$estimate, 2), 
                          "\n", "p=", format(cor_test$p.value, digits=3))) +
  scale_color_manual(values = col_vec) +
  scale_x_log10() +
  scale_y_log10() +
  theme(legend.position = "none") +
  ggtitle("Macrophage")
p_D <- p_D1 + p_D2

p_D
## Warning: ggrepel: 45 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 40 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

5E

tonic_effect <- read_tsv("data/Figure_5/tonic_effect_processed.txt")
## Rows: 548 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): GeneSymbol, Cell.type, ortholog_name, Group
## dbl (4): ProbeSetID, IFN.FC, IFNAR_KO.FC, IFNAR_KO.pval
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
de_data <- read_tsv("data/Figure_5/Tcells_DE_data.tsv")
## Rows: 296380 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): ID, SYMBOL, Cell.type, Site, Group
## dbl (8): KO.log2EXP, HET.log2EXP, FC, P.VAL, FDR, DE, ISG, Log2FC
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
exprs_mtx <- read_tsv("data/Figure_5/exprs_mtx.txt")
## Rows: 17447 Columns: 71
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (1): SYMBOL
## dbl (70): A01_Het-P8-7_LN-Tconv, A02_Het-P11-9_LN-Tconv, A03_Het-P10-4_LN-Tc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
exprs_mtx$SYMBOL <- NULL

de_data <- de_data %>%
  filter(Site != "Pooled", ISG == 1) %>%
  dplyr::select(SYMBOL, Log2FC, Site, Cell.type) %>%
  mutate(Group = paste(Site, Cell.type, sep = "_")) %>%
  dplyr::select(-c(Site, Cell.type)) %>%
  distinct(SYMBOL, Group, .keep_all = T) %>%
  pivot_wider(names_from = Group, values_from = Log2FC) %>%
  column_to_rownames("SYMBOL") %>%
  drop_na() %>%
  as.matrix()
  
col_names <- colnames(de_data)
colnames(de_data) <- str_split(col_names, "_", Inf, T)[,2]
tonic_effect_sub <- tonic_effect[match(rownames(de_data), tonic_effect$ortholog_name),] %>%
  replace_na(list(Group = "No data"))

set.seed(42)
cl <- kmeans(de_data, centers = 3)$cluster
colAnn <- HeatmapAnnotation(Tonicity = tonic_effect_sub$Group,
                            which = 'col',
                            col = list(Tonicity = c("Tonic-sensitive" = "#E41A1C", "Tonic-insensitive" = "#377EB8", "No data"="white")),
                            simple_anno_size = unit(.5, "cm"),
                            gap = unit(1, 'cm'),
                            show_annotation_name = T)

P_E = Heatmap(t(de_data),
        name = "log2FC",
        #col = colorRamp2(seq(-3, 3, length.out=9), rev(brewer.pal(n=9, name="Spectral"))),
        row_split = factor(str_split(col_names, "_", Inf, T)[,1]),
        column_split = cl,
        border=T,
        show_row_dend = F,
        show_column_dend = F,
        show_column_names = F,
        top_annotation = colAnn)

P_E

5F

de_data <- read_tsv("data/Figure_5/Tcells_DE_data.tsv")
## Rows: 296380 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): ID, SYMBOL, Cell.type, Site, Group
## dbl (8): KO.log2EXP, HET.log2EXP, FC, P.VAL, FDR, DE, ISG, Log2FC
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
go.result_df <- read_tsv("data/Figure_5/Tcells_GO_full.tsv")
## Rows: 269 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Cell.type, ID, Description, GeneRatio, BgRatio, geneID
## dbl (4): pvalue, p.adjust, qvalue, Count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
go.result.simpl_df <- read_tsv("data/Figure_5/Tcells_GO_simplified.tsv")
## Rows: 19 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Cell.type, ID, Description, GeneRatio, BgRatio, geneID
## dbl (4): pvalue, p.adjust, qvalue, Count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
de_data$SYMBOL <- str_split(de_data$SYMBOL, "; ", Inf, T)[,1]
list_names <- unique(de_data$Cell.type)
de_data_list <- group_split(de_data, Cell.type)
de_data_list <- as.list(de_data_list)
names(de_data_list) <- list_names

hsGO <- godata('org.Rn.eg.db', ont="BP")
## Warning in godata("org.Rn.eg.db", ont = "BP"): use 'annoDb' instead of 'OrgDb'
## preparing gene to GO mapping data...
## preparing IC data...
go.result.simpl_df$GeneRatioDecimal <- sapply(go.result.simpl_df$GeneRatio, function(x) {eval(parse(text=x))})

data_to_plot <- go.result.simpl_df %>%
  group_by(Cell.type) %>%
  slice_min(order_by = p.adjust, n = 25)
data_to_plot <- go.result.simpl_df[go.result.simpl_df$ID %in% go.result.simpl_df$ID,]
data_to_plot <- data_to_plot %>%
  mutate(log10p = -log10(p.adjust),
         DescriptionTrunc = StrTrunc(Description, maxlen = 60, ellipsis = "...", wbound = FALSE))

sim_mtx <- mgoSim(unique(data_to_plot$ID), unique(data_to_plot$ID), semData=hsGO, measure="Wang", combine=NULL)
dist_mtx <- 1/sim_mtx
col_clust <- hclust(as.dist(dist_mtx))
ddgram_col <- as.dendrogram(col_clust)
ggtree_plot_col <- ggtree(ddgram_col)

sc <- scale_colour_gradient2(
    low = "white",
    mid = "white",
    high = scales::muted("red")
  )

data_to_plot$ID <- factor(data_to_plot$ID, levels=col_clust$labels[col_clust$order])

dotplot <- ggplot(data_to_plot, aes(x=Cell.type, y = DescriptionTrunc, color = log10p, size = GeneRatioDecimal)) + 
  geom_point() +
  geom_point(aes(size=GeneRatioDecimal), shape = 21, colour="black", stroke=0.5) +
  cowplot::theme_cowplot() +
  theme(axis.text.x = element_text(angle = 90,  vjust = 0.5, hjust=1),
        axis.line = element_blank()) +
  ylab('') +
  theme(axis.ticks = element_blank()) +
  scale_y_discrete(position = "right") +
  sc

ggtree_plot_col <- ggtree_plot_col + aplot::ylim2(dotplot)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
P_F <- ggtree_plot_col +
  dotplot + 
  plot_layout(ncol = 2, widths = c(0.7, 4))

P_F

S3

markers_df <- read_tsv("data/Figure_5/marker_genes.txt")
## Rows: 46 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): gene, gene.corresp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
color_mapping <- colorRampPalette(brewer.pal(9, "Set1"))(21)[1:18]
names(color_mapping) <- levels(seurat_obj$cell.type)
markers_df$Color <- color_mapping[match(markers_df$gene.corresp, names(color_mapping))]
markers_df <- markers_df %>%
  replace_na(list(Color = "#b1b1b1")) %>%
  mutate(y.label = paste("<span style = 'color:",
                         Color,
                         ";'>",
                         gene,
                         "</span>", sep = ""))

data.to.plot <- dot_plot(object = seurat_obj[,seurat_obj$genotype=="HE"],
        assay = "RNA",
        features = markers_df$gene,
        cluster.idents = TRUE,
        group.by = "cell.type") %>%
  dplyr::rename(`% Expressing` = pct.exp, `Cell type` = id) %>%
  left_join(markers_df, by = c("features.plot" = "gene"))
## Warning: The following requested variables were not found: Csfr1, Cd51
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
data.to.plot$features.plot <- factor(data.to.plot$features.plot, levels = unique(markers_df$gene))

data.to.plot <- data.to.plot %>%
  filter(`% Expressing` > 1)
  
data.to.plot$y.label <- factor(data.to.plot$y.label, levels = markers_df$y.label)
data.to.plot$`Cell type` <- factor(data.to.plot$`Cell type`, levels = (levels(seurat_obj$cell.type)))

dotplot <- data.to.plot %>%
  ggplot(aes(y=features.plot, x = `Cell type`, color = avg.exp.scaled, size = `% Expressing`)) + 
  geom_point() +
  geom_point(aes(size=`% Expressing`), shape = 21, colour="black", stroke=0.5) +
  theme_cowplot(13) +
  scale_radius(limits = c(0, 100)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.text.y = element_markdown(),
        axis.line = element_blank()) +
  ylab(NULL) +
  xlab(NULL) +
  theme(axis.ticks = element_blank()) +
  scale_x_discrete(position = "top") +
  viridis::scale_colour_viridis(option="magma") +
  guides(size=guide_legend(override.aes=list(shape=21, colour="black", fill="white")))

labels <- ggplot(data.to.plot %>%
                   distinct(`Cell type`),
                 aes(y = factor(1), x = `Cell type`, fill = `Cell type`)) + 
  geom_tile() +
  scale_fill_manual(values = color_mapping) + 
  theme_nothing() +
  xlim2(dotplot) +
  xlab(NULL)

P <- labels +
  dotplot + 
  plot_layout(nrow = 2, heights = c(.2, 9))

P

S4A

# This ISG list is from Interferome DB
rat_isgs <- read.table("data/Figure_5/rat_isgs.tsv", header = TRUE, sep = "\t")

# DE analysis genes
de.results <- read.table("data/Figure_5/DE_results.tsv", header = TRUE, sep = "\t")

# Preselecting cell types
selected_cell_types <- de.results %>% filter(FDR < 0.05) %>% group_by(Cell.type) %>% summarise(Count = n()) %>% pull(Cell.type)

# Enrichment of ISGs in downregulated genes
enrichment_df = data.frame()
for (Cell_type in selected_cell_types) {
  de.results_sub <- de.results %>%
    dplyr::filter(Cell.type == Cell_type) %>%
    mutate(DE = ifelse(FDR < 0.05 & logFC < 1, 1, 0)) %>%
    dplyr::select(Gene.name, DE)
  
  rat_de_isg <- de.results_sub %>%
    left_join(rat_isgs, by=c("Gene.name"="Rat_symbol")) %>%
    replace_na(list(ISG = 0))
  
  contingency.table <- table(rat_de_isg$ISG, rat_de_isg$DE)
  
  if (ncol(contingency.table) < 2) {
    enrichment_df <- rbind(
      enrichment_df,
      data.frame(Cell.type = Cell_type,
                 pval = NA,
                 OR = NA,
                 CI.low = NA,
                 CI.hi = NA))
    next
  }
  
  test <- fisher.test(contingency.table)
  
  enrichment_df <- rbind(
    enrichment_df,
    data.frame(Cell.type = Cell_type,
               pval = test$p.value,
               OR = test$estimate,
               CI.low = test$conf.int[1],
               CI.hi = test$conf.int[2])
  )
}

enrichment_df$p.adj <- p.adjust(enrichment_df$pval)

enrichment_df <- enrichment_df %>%
  mutate(Col = if_else(p.adj < .05, "p<0.05", "p>0.05"))

col_vec <- as.vector(met.brewer("Nizami", 5)[4])

enrichment_df <- enrichment_df %>%
  mutate(hjust = ifelse(CI.hi==Inf, 1.2, 1.2),
         hjust = ifelse(is.na(CI.hi), NA, hjust))

enrichment_df$Cell.type <- factor(enrichment_df$Cell.type, levels = enrichment_df$Cell.type[order(enrichment_df$OR)])

P_S4A <- ggplot(enrichment_df, aes(x = Cell.type, y = OR, ymin = CI.low, ymax = CI.hi, color=Col)) + 
  geom_linerange() + 
  geom_pointrange() +
  geom_text(aes(label = formatC(p.adj, format = "e")), hjust=enrichment_df$hjust, vjust=-.4) +
  scale_y_log10() +
  coord_flip() +
  #geom_hline(yintercept = 1, linetype = "dashed") +
  theme_cowplot(16) +
  #ylim(0, 700) + 
  scale_color_manual(values = c(as.vector(met.brewer("Nizami", 5)[4]),
             "grey")) +
  ylab("OR") +
  xlab(NULL) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

P_S4A

S3

gntp_col <- met.brewer("Nizami", 2)[c(2, 1)]
names(gntp_col) <- c("HE", "KO")

P_S3 <- seurat_obj@meta.data %>%
  group_by(genotype, seq_folder, cell.type) %>%
  summarise(Count = n()) %>%
  mutate(Freq = Count/sum(Count)) %>%
  ggplot(aes(cell.type, Freq, fill = genotype)) +
  stat_summary(geom = "bar", fun = mean, position = "dodge") +
  stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge") +
  geom_point(position = position_dodge(0.8), alpha=0.3, size = 0.5) +
  scale_fill_manual(values = gntp_col) +
  theme_cowplot() +
  theme(axis.text.x = element_text(angle = 90,  vjust = 0.5, hjust=1)) +
  labs(x = NULL, y = "Frequency")
## `summarise()` has grouped output by 'genotype', 'seq_folder'. You can override
## using the `.groups` argument.
P_S3

S4B

p1 <- plot_scatter("Activated NK cell")
## As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.
## This message is displayed once per session.
p2 <- plot_scatter("Macrophage (M1-like)")
p3 <- plot_scatter("Neutrophil")
p4 <- plot_scatter("Naive CD4+ T cell")
p5 <- plot_scatter("pDC")
p6 <- plot_scatter("NK cell")
p7 <- plot_scatter("FO B cell")
p8 <- plot_scatter("CD8+ Tcm cell")
p9 <- plot_scatter("Memory B cell 2")

P_S4B <- p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9 + plot_layout(ncol = 3)
P_S4B
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## Warning in transformation$transform(coord_limits): NaNs produced
## Warning in transformation$transform(coord_limits): NaNs produced
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

sessionInfo()
## R version 4.4.3 (2025-02-28)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Indiana/Indianapolis
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggtext_0.1.2           aplot_0.2.5            viridis_0.6.5         
##  [4] viridisLite_0.4.2      ggdendro_0.2.0         ggtree_3.14.0         
##  [7] GOSemSim_2.32.0        org.Rn.eg.db_3.20.0    AnnotationDbi_1.68.0  
## [10] IRanges_2.40.1         S4Vectors_0.44.0       Biobase_2.66.0        
## [13] BiocGenerics_0.52.0    enrichplot_1.26.6      clusterProfiler_4.14.6
## [16] msigdbr_10.0.2         circlize_0.4.16        ComplexHeatmap_2.22.0 
## [19] ggpubr_0.6.0           ggrepel_0.9.6          scales_1.3.0          
## [22] DescTools_0.99.60      patchwork_1.3.0        cowplot_1.1.3         
## [25] MetBrewer_0.2.0        lubridate_1.9.4        forcats_1.0.0         
## [28] stringr_1.5.1          dplyr_1.1.4            purrr_1.0.4           
## [31] readr_2.1.5            tidyr_1.3.1            tibble_3.2.1          
## [34] ggplot2_3.5.2          tidyverse_2.0.0        RColorBrewer_1.1-3    
## [37] Seurat_5.2.1           SeuratObject_5.0.2     sp_2.2-0              
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.6                matrixStats_1.5.0       spatstat.sparse_3.1-0  
##   [4] httr_1.4.7              doParallel_1.0.17       tools_4.4.3            
##   [7] sctransform_0.4.1       backports_1.5.0         R6_2.6.1               
##  [10] lazyeval_0.2.2          uwot_0.2.3              GetoptLong_1.0.5       
##  [13] litedown_0.7            withr_3.0.2             gridExtra_2.3          
##  [16] progressr_0.15.1        cli_3.6.4               Cairo_1.6-2            
##  [19] spatstat.explore_3.4-2  fastDummies_1.7.5       labeling_0.4.3         
##  [22] sass_0.4.10             mvtnorm_1.3-3           spatstat.data_3.1-6    
##  [25] proxy_0.4-27            ggridges_0.5.6          pbapply_1.7-2          
##  [28] commonmark_1.9.5        yulab.utils_0.2.0       gson_0.1.0             
##  [31] DOSE_4.0.1              R.utils_2.13.0          parallelly_1.43.0      
##  [34] readxl_1.4.5            rstudioapi_0.17.1       RSQLite_2.3.9          
##  [37] gridGraphics_0.5-1      generics_0.1.3          shape_1.4.6.1          
##  [40] vroom_1.6.5             ica_1.0-3               spatstat.random_3.3-3  
##  [43] car_3.1-3               GO.db_3.20.0            Matrix_1.7-3           
##  [46] abind_1.4-8             R.methodsS3_1.8.2       lifecycle_1.0.4        
##  [49] yaml_2.3.10             carData_3.0-5           qvalue_2.38.0          
##  [52] Rtsne_0.17              blob_1.2.4              promises_1.3.2         
##  [55] crayon_1.5.3            ggtangle_0.0.6          miniUI_0.1.2           
##  [58] lattice_0.22-7          haven_2.5.4             KEGGREST_1.46.0        
##  [61] pillar_1.10.2           knitr_1.50              fgsea_1.32.4           
##  [64] rjson_0.2.23            boot_1.3-31             gld_2.6.7              
##  [67] future.apply_1.11.3     codetools_0.2-20        fastmatch_1.1-6        
##  [70] glue_1.8.0              ggfun_0.1.8             spatstat.univar_3.1-2  
##  [73] data.table_1.17.0       treeio_1.30.0           vctrs_0.6.5            
##  [76] png_0.1-8               spam_2.11-1             cellranger_1.1.0       
##  [79] gtable_0.3.6            assertthat_0.2.1        cachem_1.1.0           
##  [82] xfun_0.52               mime_0.13               survival_3.8-3         
##  [85] iterators_1.0.14        fitdistrplus_1.2-2      ROCR_1.0-11            
##  [88] nlme_3.1-168            bit64_4.6.0-1           RcppAnnoy_0.0.22       
##  [91] GenomeInfoDb_1.42.3     bslib_0.9.0             irlba_2.3.5.1          
##  [94] KernSmooth_2.23-26      colorspace_2.1-1        DBI_1.2.3              
##  [97] Exact_3.3               tidyselect_1.2.1        bit_4.6.0              
## [100] compiler_4.4.3          expm_1.0-0              xml2_1.3.8             
## [103] plotly_4.10.4           msigdbdf_24.1.0         lmtest_0.9-40          
## [106] digest_0.6.37           goftest_1.2-3           spatstat.utils_3.1-3   
## [109] rmarkdown_2.29          XVector_0.46.0          htmltools_0.5.8.1      
## [112] pkgconfig_2.0.3         fastmap_1.2.0           rlang_1.1.6            
## [115] GlobalOptions_0.1.2     htmlwidgets_1.6.4       UCSC.utils_1.2.0       
## [118] shiny_1.10.0            farver_2.1.2            jquerylib_0.1.4        
## [121] zoo_1.8-14              jsonlite_2.0.0          BiocParallel_1.40.2    
## [124] R.oo_1.27.0             magrittr_2.0.3          ggplotify_0.1.2        
## [127] Formula_1.2-5           GenomeInfoDbData_1.2.13 dotCall64_1.2          
## [130] munsell_0.5.1           Rcpp_1.0.14             ape_5.8-1              
## [133] babelgene_22.9          reticulate_1.42.0       stringi_1.8.7          
## [136] rootSolve_1.8.2.4       zlibbioc_1.52.0         MASS_7.3-65            
## [139] plyr_1.8.9              parallel_4.4.3          listenv_0.9.1          
## [142] lmom_3.2                deldir_2.0-4            Biostrings_2.74.1      
## [145] splines_4.4.3           gridtext_0.1.5          tensor_1.5             
## [148] hms_1.1.3               igraph_2.1.4            spatstat.geom_3.3-6    
## [151] markdown_2.0            ggsignif_0.6.4          RcppHNSW_0.6.0         
## [154] reshape2_1.4.4          evaluate_1.0.3          tzdb_0.5.0             
## [157] foreach_1.5.2           httpuv_1.6.16           RANN_2.6.2             
## [160] polyclip_1.10-7         future_1.40.0           clue_0.3-66            
## [163] scattermore_1.2         broom_1.0.8             xtable_1.8-4           
## [166] tidytree_0.4.6          e1071_1.7-16            RSpectra_0.16-2        
## [169] rstatix_0.7.2           later_1.4.2             class_7.3-23           
## [172] memoise_2.0.1           cluster_2.1.8.1         timechange_0.3.0       
## [175] globals_0.17.0